To detect different gene expression in a small subpopulation (<1%) of skin, call Merkel cell, upon shh, fgf20, and edar mutation. Aim to analyze the connection between the three different pathways, and try to get clue of the genes participating in Merkel cell development.
E15.5 embryo skin from 2 mutant lines:
edar mutant = mutation in gene ectodysplasin-A receptor, Single point mutation (Mutation details: This allele involves a G to A transition mutation at nucleotide 1,135 that causes the amino acid change: glutamate to lysine at position 379 (E379K). (J:56496))
phenotype (http://www.informatics.jax.org/allele/allgenoviews/MGI:1856018) = abnormal coat/hair morphology, darkened coat color
fgf20 mutant = fgf20-β-galactosidase knock-in allele phenotype= no guard hair in adult mouse back skin
source("http://bioconductor.org/biocLite.R")
## Bioconductor version 3.2 (BiocInstaller 1.20.0), ?biocLite for help
#biocLite("edgeR")
library(Rsubread)
library(limma)
library(edgeR)
library(ggplot2)
library(rgl)
library(knitr)
knit_hooks$set(rgl = function(before, options, envir) {
if (!before) {
## after a chunk has been evaluated
if (rgl.cur() == 0) return() # no active device
name = paste(options$fig.path, options$label, sep = '')
rgl.snapshot(paste(name, '.png', sep = ''), fmt = 'png')
return(paste('\\includegraphics{', name, '}\n', sep = ''))
}
})
knit_hooks$set(webgl = hook_webgl)
#Command line version
module load subread
x=$(ls *.bam)
featureCounts -p -T 8 -s 2 -p -t exon -g gene_id -a /data/maggiec/RNASeq/Genomes/mm10/gencode.vM4.all.gtf -o counts_ss.txt $x
#Used R version:
gtf="data/maggiec/RNASeq/Genomes/mm10/gencode.vM4.all.gtf"
targets <- readTargets()
fc <- featureCounts(files=targets$bam,isGTFAnnotationFile=TRUE,nthreads=32,
annot.ext=gtf,GTF.attrType="gene_name",strandSpecific=2,isPairedEnd=TRUE)
x <- DGEList(counts=fc$counts, genes=fc$annotation)
setwd("/Users/maggiec/GitHub/Maggie/ccbr620/")
#load("data/ssData.RData")
#fc=fc_ss
load("data/RNASeq_orig.RData")
mapped=read.delim(file = "data/mapped.txt",header = TRUE)
targets$mapped=mapped$mapped
ls()
## [1] "fc" "gtf" "mapped" "targets"
targets
## bam Phenotype Batch mapped
## 1 Sample_e10.bam edar/+ control 1 76698538
## 2 Sample_e1.bam edar/+ control 2 96350389
## 3 Sample_e2.bam edar/+ control 2 98212381
## 4 Sample_e4.bam edar/edar mutant 2 104174178
## 5 Sample_e5.bam edar/edar mutant 2 97482428
## 6 Sample_e9.bam edar/edar mutant 1 56689636
## 7 Sample_f1.bam fgf20lz/+ control 3 75223785
## 8 Sample_f2.bam fgf20lz/+ control 3 81814741
## 9 Sample_f4.bam fgf20lz/lz mutant 3 53524329
## 10 Sample_f5.bam fgf20lz/+ control 3 79315207
## 11 Sample_f6.bam fgf20lz/lz mutant 3 84455037
## 12 Sample_f7.bam fgf20lz/lz mutant 3 80703536
fc$stat
## Status Sample_e10.bam Sample_e1.bam Sample_e2.bam
## 1 Assigned 30137063 38131187 38018334
## 2 Unassigned_Ambiguity 338418 423143 427446
## 3 Unassigned_MultiMapping 5540811 6724855 7144799
## 4 Unassigned_NoFeatures 2332980 2896013 3515618
## 5 Unassigned_Unmapped 0 0 0
## 6 Unassigned_MappingQuality 0 0 0
## 7 Unassigned_FragementLength 0 0 0
## 8 Unassigned_Chimera 0 0 0
## 9 Unassigned_Secondary 0 0 0
## 10 Unassigned_Nonjunction 0 0 0
## 11 Unassigned_Duplicate 0 0 0
## Sample_e4.bam Sample_e5.bam Sample_e9.bam Sample_f1.bam Sample_f2.bam
## 1 39859361 37639209 21918421 29010380 31527017
## 2 464930 429850 251806 328766 361974
## 3 7824053 7377360 4328405 5049329 5805782
## 4 3938754 3294804 1846191 3223421 3212604
## 5 0 0 0 0 0
## 6 0 0 0 0 0
## 7 0 0 0 0 0
## 8 0 0 0 0 0
## 9 0 0 0 0 0
## 10 0 0 0 0 0
## 11 0 0 0 0 0
## Sample_f4.bam Sample_f5.bam Sample_f6.bam Sample_f7.bam
## 1 20773860 30420667 32484170 31229089
## 2 226857 343845 360251 353216
## 3 3820429 5435442 5666179 5505908
## 4 1941022 3457653 3716923 3263560
## 5 0 0 0 0
## 6 0 0 0 0
## 7 0 0 0 0
## 8 0 0 0 0
## 9 0 0 0 0
## 10 0 0 0 0
## 11 0 0 0 0
fc1=mat=fc$counts
tfc1=t(fc1)
filter <- apply(fc1, 1, function(x) length(x[x>5])>=1)
fc1filt <- fc1[filter,]
genes <- rownames(fc1filt)
options(scipen=9)
map=as.numeric(targets$mapped)
names(map)=sapply(strsplit(targets$bam,split="\\."),function(x) x[1])
barplot(map, col=as.numeric(as.factor(targets$Phenotype)),las = 2,cex.axis = 0.7)
library(ggplot2)
library(reshape)
fc1filt=as.data.frame(fc1filt)
fc1filtnames=sapply(strsplit(colnames(fc1filt),split="_"),function(x) x[2])
fc1filtnames=sapply(strsplit(fc1filtnames,split="\\."),function(x) x[1])
colnames(fc1filt)=paste(fc1filtnames,targets$Phenotype)
df.m <- melt(as.data.frame(fc1filt))
ggplot(df.m) +
geom_density(aes(x = value, colour = variable)) + labs(x = NULL) +
theme(legend.position='right') + scale_x_log10() + ggtitle("Raw Counts")
par(mar=c(10,7,1,1))
boxplot(log(value)~variable,las=2,data=df.m,main="Raw Signal",
ylab="Counts",col=as.numeric(as.factor(targets$Phenotype)))
x <- DGEList(counts=fc$counts, genes=fc$annotation)
isexpr <- rowSums(cpm(x)>0.5) >= 2
hasannot <- rowSums(is.na(x$genes))==0
x <- x[isexpr & hasannot,keep.lib.sizes=FALSE]
#Number of filtered genes
dim(x)
## [1] 16253 12
x <- calcNormFactors(x)
par(mfrow=c(2,6))
for (i in 1:dim(x)[2]){
plotMD(cpm(x, log=TRUE), column=i)
abline(h=0, col="red", lty=2, lwd=2)
}
#Do analysis for entire group
celltype <- factor(targets$Phenotype)
design <- model.matrix(~celltype)
design
## (Intercept) celltypeedar/edar mutant celltypefgf20lz/+ control
## 1 1 0 0
## 2 1 0 0
## 3 1 0 0
## 4 1 1 0
## 5 1 1 0
## 6 1 1 0
## 7 1 0 1
## 8 1 0 1
## 9 1 0 0
## 10 1 0 1
## 11 1 0 0
## 12 1 0 0
## celltypefgf20lz/lz mutant
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
## 7 0
## 8 0
## 9 1
## 10 0
## 11 1
## 12 1
## attr(,"assign")
## [1] 0 1 1 1
## attr(,"contrasts")
## attr(,"contrasts")$celltype
## [1] "contr.treatment"
v <- voom(x,design,plot=TRUE,normalize="quantile")
colnames(v$E)=paste(fc1filtnames,targets$Phenotype)
df.m <- melt(as.data.frame(v$E))
## Using as id variables
ggplot(df.m) +
geom_density(aes(x = value, colour = variable)) + labs(x = NULL) +
theme(legend.position='right') + ggtitle("Normalized Counts")
par(mar=c(10,7,1,1))
boxplot(value~variable,las=2,data=df.m,main="Normalized Signal",
ylab="Counts",col=as.numeric(as.factor(targets$Phenotype)))
edf=as.matrix(v$E)
tedf= t(edf)
pca=prcomp(tedf,scale.=T)
tedf1 = data.frame(tedf)
Phenotype=sapply(strsplit(targets$Phenotype,split=" "),function(x) x[1])
cell_rep=paste(Phenotype,targets$Batch,sep=".")
tedf1$group = as.factor(Phenotype)
plot(pca,type="lines") #Decide how many PC's are relevant for plotting
pca$x[,1:3] #look at first 3 PC's
## PC1 PC2 PC3
## e10 edar/+ control -46.8250401 -40.90604 28.098641
## e1 edar/+ control 59.6977305 -83.09294 -55.749286
## e2 edar/+ control 1.1366238 -32.64203 67.385731
## e4 edar/edar mutant 77.3047336 -53.72389 1.141197
## e5 edar/edar mutant 8.3889563 -62.11561 39.384595
## e9 edar/edar mutant -149.3487422 -35.83906 -24.546535
## f1 fgf20lz/+ control 19.5901540 50.61451 5.055829
## f2 fgf20lz/+ control -68.5182693 52.18792 9.116811
## f4 fgf20lz/lz mutant -3.1247422 25.74435 -107.277643
## f5 fgf20lz/+ control 35.5886477 68.42053 2.288900
## f6 fgf20lz/lz mutant -0.5432889 71.26031 26.014322
## f7 fgf20lz/lz mutant 66.6532369 40.09195 9.087437
plot3d(pca$x[,1:3],col = as.integer(tedf1$group),type="s",size=2)
group.v<-as.vector(cell_rep)
text3d(pca$x, pca$y, pca$z, group.v, cex=1.0, adj = 1.2)
#rgl.postscript("pca3d_indiv.pdf","pdf")
You must enable Javascript to view this page properly.
targets[1:6,]
## bam Phenotype Batch mapped
## 1 Sample_e10.bam edar/+ control 1 76698538
## 2 Sample_e1.bam edar/+ control 2 96350389
## 3 Sample_e2.bam edar/+ control 2 98212381
## 4 Sample_e4.bam edar/edar mutant 2 104174178
## 5 Sample_e5.bam edar/edar mutant 2 97482428
## 6 Sample_e9.bam edar/edar mutant 1 56689636
celltype <- factor(targets$Phenotype[1:6])
Batch <- factor(targets$Batch[1:6])
cell_rep=paste(celltype,Batch,sep=".")
design1 <- model.matrix(~Batch+celltype)
design1
## (Intercept) Batch2 celltypeedar/edar mutant
## 1 1 0 0
## 2 1 1 0
## 3 1 1 0
## 4 1 1 1
## 5 1 1 1
## 6 1 0 1
## attr(,"assign")
## [1] 0 1 2
## attr(,"contrasts")
## attr(,"contrasts")$Batch
## [1] "contr.treatment"
##
## attr(,"contrasts")$celltype
## [1] "contr.treatment"
v1 <- voom(x[,1:6],design1,plot=TRUE,normalize="quantile")
edf=as.matrix(v$E[,1:6])
tedf= t(edf)
pca=prcomp(tedf,scale.=T)
tedf1 = data.frame(tedf)
Phenotype=sapply(strsplit(targets$Phenotype[1:6],split=" "),function(x) x[1])
Batch=targets$Batch[1:6]
tedf1$group = as.factor(Phenotype)
plot(pca,type="lines") #Decide how many PC's are relevant for plotting
pca$x[,1:3] #look at first 3 PC's
## PC1 PC2 PC3
## e10 edar/+ control -39.58156 -36.729254 51.561576
## e1 edar/+ control 69.60465 -94.937753 -35.224601
## e2 edar/+ control 9.08715 21.257686 79.763216
## e4 edar/edar mutant 81.62089 63.120079 -46.958472
## e5 edar/edar mutant 18.99469 40.427968 4.626303
## e9 edar/edar mutant -139.72581 6.861275 -53.768022
plot3d(pca$x[,1:3],col = as.integer(tedf1$group),type="s",size=2)
group.v<-as.vector(paste(Phenotype,Batch))
text3d(pca$x, pca$y, pca$z, group.v, cex=1.0, adj = 1.2)
#rgl.postscript("pca3d_indiv_edar_batch.pdf","pdf")
You must enable Javascript to view this page properly.
#Process the Fgf group
targets[7:12,]
## bam Phenotype Batch mapped
## 7 Sample_f1.bam fgf20lz/+ control 3 75223785
## 8 Sample_f2.bam fgf20lz/+ control 3 81814741
## 9 Sample_f4.bam fgf20lz/lz mutant 3 53524329
## 10 Sample_f5.bam fgf20lz/+ control 3 79315207
## 11 Sample_f6.bam fgf20lz/lz mutant 3 84455037
## 12 Sample_f7.bam fgf20lz/lz mutant 3 80703536
celltype <- factor(targets$Phenotype[7:12])
design2 <- model.matrix(~celltype)
design2
## (Intercept) celltypefgf20lz/lz mutant
## 1 1 0
## 2 1 0
## 3 1 1
## 4 1 0
## 5 1 1
## 6 1 1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$celltype
## [1] "contr.treatment"
v2 <- voom(x[,7:12],design2,plot=TRUE,normalize="quantile")
#fgf:
edf=as.matrix(v$E[,7:12])
tedf= t(edf)
pca=prcomp(tedf,scale.=T)
tedf1 = data.frame(tedf)
Phenotype=sapply(strsplit(targets$Phenotype[7:12],split=" "),function(x) x[1])
tedf1$group = as.factor(Phenotype)
plot(pca,type="lines") #Decide how many PC's are relevant for plotting
pca$x[,1:3] #look at first 3 PC's
## PC1 PC2 PC3
## f1 fgf20lz/+ control -15.461277 15.64215 -14.89205
## f2 fgf20lz/+ control 126.034997 21.70746 -43.05940
## f4 fgf20lz/lz mutant 5.505962 -132.36504 17.91359
## f5 fgf20lz/+ control -37.291128 29.80851 59.56308
## f6 fgf20lz/lz mutant 11.728095 51.33417 54.30037
## f7 fgf20lz/lz mutant -90.516648 13.87275 -73.82559
plot3d(pca$x[,1:3],col = as.integer(tedf1$group),type="s",size=2)
group.v<-as.vector(Phenotype)
text3d(pca$x, pca$y, pca$z, group.v, cex=1.0, adj = 1.2)
#rgl.postscript("pca3d_indiv_fgf.pdf","pdf")
You must enable Javascript to view this page properly.
genes=c("Sox2","Atoh1","Shh","Gli1","Edar","Traf6","Tnfrsf19","Lta","Ltb","Nfkb1")
vdf=cbind(v1$E,v2$E)
vdf=vdf[,c(1,2,3,4,5,6,7,8,10,9,11,12)]
genes_count=vdf[rownames(vdf) %in% genes,]
for (i in 1:length(genes)){
gbar=vdf[rownames(vdf)==genes[i],]
if(length(gbar)>0){
names(gbar)=targets$Phenotype[match(names(gbar),targets$bam)]
names(gbar)= sapply(strsplit(names(gbar),split=" "),function(x) x[1])
groupcol=as.factor(names(gbar))
barplot(2^gbar,col=as.numeric(groupcol), las = 2,main=paste(genes[i]," Counts: Normalized",sep=""),ylab="normalized cpm")
}
}
####Statistical Analysis of Edar group (lmFit)
#Run analysis of edar group:
fit1 <- lmFit(v1,design1)
fit1 <- eBayes(fit1)
options(digits=3)
topgenes1=topTable(fit1,coef=3,n=50,sort.by="p")
FC = 2^(fit1$coefficients[,3])
FC = ifelse(FC<1,-1/FC,FC)
finalres=topTable(fit1,coef=3,sort="none",n=Inf)
suppressPackageStartupMessages(library(googleVis))
library(googleVis)
op <- options(gvis.plot.tag='chart')
volcano_data=as.data.frame(cbind(fit1$coefficients[,3],-1*log10(fit1$p.value[,3])))
volcano_data$pop.html.tooltip=rownames(volcano_data)
yval=ceiling(max(fit1$coefficients[,3]))
Scatter <- gvisScatterChart(volcano_data,
options=list(
tooltip="{isHtml:'True'}",
legend='none',
lineWidth=0, pointSize=1,
title='Edar mut vs het', vAxis="{title:'Log Odds'}",
hAxis="{title:'Log Fold Change'}",
width=1200, height=800,
hAxes="[{viewWindowMode:'explicit', viewWindow:{min:-10, max:10}}]",
vAxes="[{viewWindowMode:'explicit', viewWindow:{min:0, max:12}}]"))
plot(Scatter)
Based on the linear model:
Signal = Celltype + Batch + Residual
#Look up Batch Effect:
fit1$design
## (Intercept) Batch2 celltypeedar/edar mutant
## 1 1 0 0
## 2 1 1 0
## 3 1 1 0
## 4 1 1 1
## 5 1 1 1
## 6 1 0 1
## attr(,"assign")
## [1] 0 1 2
## attr(,"contrasts")
## attr(,"contrasts")$Batch
## [1] "contr.treatment"
##
## attr(,"contrasts")$celltype
## [1] "contr.treatment"
head(fit1$coefficients)
## (Intercept) Batch2 celltypeedar/edar mutant
## Xkr4 1.05 0.0100 0.6673
## Sox17 2.89 -0.0827 0.0601
## Mrpl15 5.75 0.1398 0.0595
## RP23-34E15.5 -1.10 0.1745 0.1878
## Lypla1 6.00 -0.2218 0.2376
## Tcea1 6.33 -0.0728 0.0421
targets
## bam Phenotype Batch mapped
## 1 Sample_e10.bam edar/+ control 1 76698538
## 2 Sample_e1.bam edar/+ control 2 96350389
## 3 Sample_e2.bam edar/+ control 2 98212381
## 4 Sample_e4.bam edar/edar mutant 2 104174178
## 5 Sample_e5.bam edar/edar mutant 2 97482428
## 6 Sample_e9.bam edar/edar mutant 1 56689636
## 7 Sample_f1.bam fgf20lz/+ control 3 75223785
## 8 Sample_f2.bam fgf20lz/+ control 3 81814741
## 9 Sample_f4.bam fgf20lz/lz mutant 3 53524329
## 10 Sample_f5.bam fgf20lz/+ control 3 79315207
## 11 Sample_f6.bam fgf20lz/lz mutant 3 84455037
## 12 Sample_f7.bam fgf20lz/lz mutant 3 80703536
v1coeff = fit1$coefficients
#Remove batch effect from Batch2
w1=v1
w1$E=v1$E
w1$E[,2:5]=v1$E[,2:5]-v1coeff[,2]
library(d3heatmap)
#Original Heatmap (notice batch effect)
topgenes_data=v1$E[rownames(v1$E) %in% rownames(topgenes1),]
topgenes_data=topgenes_data[match(rownames(topgenes1),rownames(topgenes_data)),]
colnames(topgenes_data)=targets$Phenotype[match(colnames(topgenes_data),targets$bam)]
colnames(topgenes_data)= sapply(strsplit(colnames(topgenes_data),split=" "),function(x) x[1])
colnames(topgenes_data)=paste(colnames(topgenes_data),targets$Batch[1:6])
d3heatmap(topgenes_data, scale = "row", dendrogram = "none", main = "title",
color = "YlOrRd",cexRow=1,main="topgenes")
v1res=(residuals.MArrayLM(fit1,v1)) # calculate residuals
head(v1res)
## Sample_e10.bam Sample_e1.bam Sample_e2.bam Sample_e4.bam
## Xkr4 0.0964 -0.187085 0.1092 0.40636
## Sox17 0.0393 -0.080253 0.0478 0.00823
## Mrpl15 -0.0704 0.000611 0.0643 -0.01613
## RP23-34E15.5 0.1190 0.029394 -0.1242 0.36271
## Lypla1 -0.1017 -0.037356 0.1373 -0.06012
## Tcea1 -0.0426 0.071711 -0.0302 0.02089
## Sample_e5.bam Sample_e9.bam
## Xkr4 -0.3751 -0.0897
## Sox17 0.0237 -0.0506
## Mrpl15 -0.0480 0.0790
## RP23-34E15.5 -0.2948 -0.1375
## Lypla1 -0.0359 0.1068
## Tcea1 -0.0625 0.0456
v1fit=(fitted.MArrayLM(fit1,design1)) # get fitted values
head(v1fit)
## 1 2 3 4 5 6
## Xkr4 1.05 1.063 1.063 1.730 1.730 1.720
## Sox17 2.89 2.807 2.807 2.867 2.867 2.950
## Mrpl15 5.75 5.886 5.886 5.945 5.945 5.805
## RP23-34E15.5 -1.10 -0.929 -0.929 -0.741 -0.741 -0.915
## Lypla1 6.00 5.779 5.779 6.017 6.017 6.238
## Tcea1 6.33 6.261 6.261 6.303 6.303 6.375
head(v1$E) # original = fitted + residuals
## Sample_e10.bam Sample_e1.bam Sample_e2.bam Sample_e4.bam
## Xkr4 1.149 0.876 1.17 2.136
## Sox17 2.929 2.726 2.85 2.875
## Mrpl15 5.675 5.886 5.95 5.929
## RP23-34E15.5 -0.984 -0.899 -1.05 -0.378
## Lypla1 5.899 5.742 5.92 5.957
## Tcea1 6.291 6.332 6.23 6.324
## Sample_e5.bam Sample_e9.bam
## Xkr4 1.35 1.63
## Sox17 2.89 2.90
## Mrpl15 5.90 5.88
## RP23-34E15.5 -1.04 -1.05
## Lypla1 5.98 6.35
## Tcea1 6.24 6.42
topgenes_data=w1$E[rownames(w1$E) %in% rownames(topgenes1),]
topgenes_data=topgenes_data[match(rownames(topgenes1),rownames(topgenes_data)),]
colnames(topgenes_data)=targets$Phenotype[match(colnames(topgenes_data),targets$bam)]
colnames(topgenes_data)= sapply(strsplit(colnames(topgenes_data),split=" "),function(x) x[1])
colnames(topgenes_data)=paste(colnames(topgenes_data),targets$Batch[1:6])
d3heatmap(topgenes_data, scale = "row", dendrogram = "none",
color = "YlOrRd",cexRow=1,main="topgenes")
genes_data=v1$E[rownames(v1$E) %in% genes,]
colnames(genes_data)=targets$Phenotype[match(colnames(genes_data),targets$bam)]
colnames(genes_data)= sapply(strsplit(colnames(genes_data),split=" "),function(x) x[1])
d3heatmap(genes_data, scale = "row", dendrogram = "none", main="title",
color = "YlOrRd",cexRow=1)
genes_data=w1$E[rownames(w1$E) %in% genes,]
colnames(genes_data)=targets$Phenotype[match(colnames(genes_data),targets$bam)]
colnames(genes_data)= sapply(strsplit(colnames(genes_data),split=" "),function(x) x[1])
d3heatmap(genes_data, scale = "row", dendrogram = "none",
color = "YlOrRd",cexRow=1)
vdf=cbind(v1$E,w1$E)
genes_count=vdf[rownames(vdf) %in% genes,]
for (i in 1:length(genes)){
gbar=vdf[rownames(vdf)==genes[i],]
if(length(gbar)>0){
names(gbar)=targets$Phenotype[match(names(gbar),targets$bam)]
names(gbar)= sapply(strsplit(names(gbar),split=" "),function(x) x[1])
names(gbar)[7:12]=names(gbar)[1:6]
groupcol=as.factor(names(gbar))
colors = c("green", "orange", "green", "violet", "orange", "red", "pink", "cyan")
barplot(2^gbar,col=colors[as.numeric(groupcol)], las = 2,main=paste(genes[i]," Counts: Normalized",sep=""),ylab="normalized cpm")
}
}
edf=as.matrix(w1$E)
tedf= t(edf)
pca=prcomp(tedf,scale.=T)
tedf1 = data.frame(tedf)
Phenotype=sapply(strsplit(targets$Phenotype[1:6],split=" "),function(x) x[1])
Batch=targets$Batch[1:6]
tedf1$group = as.factor(Phenotype)
plot(pca,type="lines") #Decide how many PC's are relevant for plotting
pca$x[,1:3] #look at first 3 PC's
## PC1 PC2 PC3
## Sample_e10.bam -76.7 55.90 41.53
## Sample_e1.bam -94.5 -24.81 -87.94
## Sample_e2.bam 54.6 96.85 -8.42
## Sample_e4.bam -15.9 -73.78 89.63
## Sample_e5.bam 51.4 4.26 14.24
## Sample_e9.bam 81.0 -58.42 -49.03
plot3d(pca$x[,1:3],col = as.integer(tedf1$group),type="s",size=2)
group.v<-as.vector(paste(Phenotype,Batch))
text3d(pca$x, pca$y, pca$z, group.v, cex=1.0, adj = 1.2)
#rgl.postscript("pca3d_indiv_edar_batch.pdf","pdf")
You must enable Javascript to view this page properly.
finalres = cbind(w1$E,FC,finalres)
datadir="/Users/maggiec/Github/Maggie/ccbr620/Results"
setwd(datadir)
write.table(finalres,file="Gencode_FCpval_edar.txt",
row.names=TRUE,col.names=TRUE,sep="\t",quote=FALSE)
design2 <- model.matrix(~celltype)
fit2 <- lmFit(v2,design2)
fit2 <- eBayes(fit2)
options(digits=3)
topgenes2=topTable(fit2,coef=2,n=50,sort.by="p")
finalres2=topTable(fit2,coef=2,sort="none",n=Inf)
FC2 = 2^(fit2$coefficients[,2])
FC2 = ifelse(FC2<1,-1/FC2,FC2)
finalres2 = cbind(v2$E,FC2,finalres2)
datadir="/Users/maggiec/GitHub/Maggie/ccbr620/Results"
setwd(datadir)
write.table(finalres2,file="Gencode_FCpval_Fgf.txt",
row.names=TRUE,col.names=TRUE,sep="\t",quote=FALSE)
suppressPackageStartupMessages(library(googleVis))
library(googleVis)
op <- options(gvis.plot.tag='chart')
volcano_data=as.data.frame(cbind(fit2$coefficients[,2],-1*log10(fit2$p.value[,2])))
volcano_data$pop.html.tooltip=rownames(volcano_data)
yval=ceiling(max(fit2$coefficients[,2]))
Scatter <- gvisScatterChart(volcano_data,
options=list(
tooltip="{isHtml:'True'}",
legend='none',
lineWidth=0, pointSize=1,
title='Fgf mut vs het', vAxis="{title:'Log Odds'}",
hAxis="{title:'Log Fold Change'}",
width=1200, height=800,
hAxes="[{viewWindowMode:'explicit', viewWindow:{min:-10, max:10}}]",
vAxes="[{viewWindowMode:'explicit', viewWindow:{min:0, max:6}}]"))
plot(Scatter)
library(d3heatmap)
#Original Heatmap (notice batch effect)
w2=v2
w2$E=v2$E[,c(1,2,4,3,5,6)]
topgenes_data2=w2$E[rownames(w2$E) %in% rownames(topgenes2),]
topgenes_data2=topgenes_data2[match(rownames(topgenes2),rownames(topgenes_data2)),]
colnames(topgenes_data2)=targets$Phenotype[match(colnames(topgenes_data2),targets$bam)]
colnames(topgenes_data2)= sapply(strsplit(colnames(topgenes_data2),split=" "),function(x) x[1])
colnames(topgenes_data2)=paste(colnames(topgenes_data2),targets$Batch[7:12])
d3heatmap(topgenes_data2, scale = "row", dendrogram = "none", main = "title",
color = "YlOrRd",cexRow=1,main="topgenes")
genes_data=w2$E[rownames(w2$E) %in% genes,]
colnames(genes_data)=targets$Phenotype[match(colnames(genes_data),targets$bam)]
colnames(genes_data)= sapply(strsplit(colnames(genes_data),split=" "),function(x) x[1])
d3heatmap(genes_data, scale = "row", dendrogram = "none", main="title",
color = "YlOrRd",cexRow=1)
vdf=w2$E
for (i in 1:length(genes)){
gbar=vdf[rownames(vdf)==genes[i],]
if(length(gbar)>0){
names(gbar)=targets$Phenotype[match(names(gbar),targets$bam)]
names(gbar)= sapply(strsplit(names(gbar),split=" "),function(x) x[1])
groupcol=as.factor(names(gbar))
colors = c("blue", "yellow", "green", "violet", "orange", "red", "pink", "cyan")
barplot(2^gbar,col=colors[as.numeric(groupcol)], las = 2,main=paste(genes[i]," Counts: Normalized",sep=""),ylab="normalized cpm")
}
else {
paste(genes," is not present")
}
}
## R version 3.2.1 (2015-06-18)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.10.4 (Yosemite)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] d3heatmap_0.6.1 googleVis_0.5.10 reshape_0.8.5
## [4] knitr_1.11 rgl_0.95.1367 ggplot2_1.0.1
## [7] edgeR_3.12.0 limma_3.26.0 Rsubread_1.20.0
## [10] BiocInstaller_1.20.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.1 magrittr_1.5 MASS_7.3-44
## [4] munsell_0.4.2 colorspace_1.2-6 stringr_1.0.0
## [7] plyr_1.8.3 tools_3.2.1 grid_3.2.1
## [10] gtable_0.1.2 png_0.1-7 htmltools_0.2.6
## [13] yaml_2.1.13 digest_0.6.8 RJSONIO_1.3-0
## [16] RColorBrewer_1.1-2 reshape2_1.4.1 formatR_1.2.1
## [19] htmlwidgets_0.5 base64enc_0.1-3 evaluate_0.8
## [22] rmarkdown_0.8.1 labeling_0.3 stringi_0.5-5
## [25] scales_0.3.0 jsonlite_0.9.17 proto_0.3-10